{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sample for using transformer with KFServing SDK "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The notebook shows how to use KFServing SDK to create InferenceService with transformer, predictor."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kubernetes import client\n",
    "\n",
    "from kfserving import KFServingClient\n",
    "from kfserving import constants\n",
    "from kfserving import V1alpha2EndpointSpec\n",
    "from kfserving import V1alpha2PredictorSpec\n",
    "from kfserving import V1alpha2TransformerSpec\n",
    "from kfserving import V1alpha2PyTorchSpec\n",
    "from kfserving import V1alpha2CustomSpec\n",
    "from kfserving import V1alpha2InferenceServiceSpec\n",
    "from kfserving import V1alpha2InferenceService\n",
    "from kubernetes.client import V1Container\n",
    "from kubernetes.client import V1ResourceRequirements\n",
    "import kubernetes.client\n",
    "import os\n",
    "import requests \n",
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define InferenceService with Transformer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add predictor and transformer on the endpoint spec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION\n",
    "default_endpoint_spec = V1alpha2EndpointSpec(\n",
    "                          predictor=V1alpha2PredictorSpec(\n",
    "                            min_replicas=1,\n",
    "                            pytorch=V1alpha2PyTorchSpec(\n",
    "                              storage_uri='gs://kfserving-samples/models/pytorch/cifar10',\n",
    "                              model_class_name= \"Net\",\n",
    "                              resources=V1ResourceRequirements(\n",
    "                                  requests={'cpu':'100m','memory':'1Gi'},\n",
    "                                  limits={'cpu':'100m', 'memory':'1Gi'}))),\n",
    "                          transformer=V1alpha2TransformerSpec(\n",
    "                            min_replicas=1,\n",
    "                            custom=V1alpha2CustomSpec(\n",
    "                              container=V1Container(\n",
    "                              image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest',\n",
    "                              name='user-container',\n",
    "                              resources=V1ResourceRequirements(\n",
    "                                  requests={'cpu':'100m','memory':'1Gi'},\n",
    "                                  limits={'cpu':'100m', 'memory':'1Gi'})))))\n",
    "    \n",
    "isvc = V1alpha2InferenceService(api_version=api_version,\n",
    "                          kind=constants.KFSERVING_KIND,\n",
    "                          metadata=client.V1ObjectMeta(\n",
    "                              name='cifar10', namespace='default'),\n",
    "                          spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create InferenceService with Transformer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Call KFServingClient to create InferenceService."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'apiVersion': 'serving.kubeflow.org/v1alpha2',\n",
       " 'kind': 'InferenceService',\n",
       " 'metadata': {'creationTimestamp': '2019-11-13T02:19:37Z',\n",
       "  'generation': 1,\n",
       "  'name': 'cifar10',\n",
       "  'namespace': 'default',\n",
       "  'resourceVersion': '6381215',\n",
       "  'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/default/inferenceservices/cifar10',\n",
       "  'uid': 'b9e6b4a7-186d-4daa-aba0-18beb18563b2'},\n",
       " 'spec': {'default': {'predictor': {'minReplicas': 1,\n",
       "    'pytorch': {'modelClassName': 'Net',\n",
       "     'resources': {'limits': {'cpu': '100m', 'memory': '1Gi'},\n",
       "      'requests': {'cpu': '100m', 'memory': '1Gi'}},\n",
       "     'runtimeVersion': '0.2.0',\n",
       "     'storageUri': 'gs://kfserving-samples/models/pytorch/cifar10'}},\n",
       "   'transformer': {'custom': {'container': {'image': 'gcr.io/kubeflow-ci/kfserving/image-transformer:latest',\n",
       "      'name': 'user-container',\n",
       "      'resources': {'limits': {'cpu': '100m', 'memory': '1Gi'},\n",
       "       'requests': {'cpu': '100m', 'memory': '1Gi'}}}},\n",
       "    'minReplicas': 1}}},\n",
       " 'status': {}}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "KFServing = KFServingClient()\n",
    "KFServing.create(isvc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check the InferenceService"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               \n",
      "cifar10              False                                                                                        \n",
      "cifar10              False                                                                                        \n",
      "cifar10              False                                                                                        \n",
      "cifar10              False                                                                                        \n",
      "cifar10              True       100                             http://cifar10.default.example.com/v1/models/ci...\n"
     ]
    }
   ],
   "source": [
    "KFServing.get('cifar10', namespace='default', watch=True, timeout_seconds=120)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Predict the image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "api_instance = kubernetes.client.CoreV1Api(kubernetes.client.ApiClient())\n",
    "service = api_instance.read_namespaced_service(\"istio-ingressgateway\", \"istio-system\", exact='true')\n",
    "cluster_ip = service.status.load_balancer.ingress[0].ip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "http://9.21.53.162/v1/models/cifar10:predict {'Host': 'cifar10.default.example.com'}\n",
      "[[-1.6099603176116943, -2.6461076736450195, 0.3284446597099304, 2.4825077056884766, 0.43524616956710815, 2.3108043670654297, 1.00056791305542, -0.4232763946056366, -0.5100947022438049, -1.797839641571045]]\n",
      "3\n"
     ]
    }
   ],
   "source": [
    "url = \"http://\" + cluster_ip + \"/v1/models/cifar10:predict\"\n",
    "headers = { 'Host': 'cifar10.default.example.com' }\n",
    "with open('./input.json') as json_file:\n",
    "    data = json.load(json_file)\n",
    "    print(url, headers)\n",
    "    response = requests.post(url, json.dumps(data), headers=headers)\n",
    "    probs = json.loads(response.content.decode('utf-8'))[\"predictions\"]\n",
    "    print(probs)\n",
    "    print(np.argmax(probs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Delete the InferenceService"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'kind': 'Status',\n",
       " 'apiVersion': 'v1',\n",
       " 'metadata': {},\n",
       " 'status': 'Success',\n",
       " 'details': {'name': 'cifar10',\n",
       "  'group': 'serving.kubeflow.org',\n",
       "  'kind': 'inferenceservices',\n",
       "  'uid': 'b9e6b4a7-186d-4daa-aba0-18beb18563b2'}}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "KFServing.delete('cifar10', namespace='default')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
